Skip to content

GPU Discovery not detecting AMD Mi3xx GPUs #11701

@dk-blackfuel

Description

@dk-blackfuel

problem

on a server with 8 AMD Mi325, the discovery script doesn't report any GPU :

root@gpu-31:/home/amd/Workarea/cloudstack-test# ./gpudiscovery.sh
{ "gpus": [

]}

when lspci -nnm gives :

root@gpu-31:/home/amd/Workarea/cloudstack-test# lspci -nnm | grep AMD | grep -i Acc
05:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
15:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
65:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
75:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
85:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
95:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
e5:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]"
f5:00.0 "Processing accelerators [1200]" "Advanced Micro Devices, Inc. [AMD/ATI] [1002]" "Aqua Vanjaram [Instinct MI325X] [74a5]" -rff -pff "" ""

The expected output would be :

{ "gpus": [
    {
      "pci_address":"05:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:05:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x05",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"15:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:15:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x15",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"65:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:65:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x65",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"75:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:75:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x75",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"85:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:85:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x85",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"95:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:95:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0x95",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"e5:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:e5:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0xe5",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }
,
    {
      "pci_address":"f5:00.0",
      "vendor_id":"1002",
      "device_id":"74a5",
      "vendor":"Advanced Micro Devices, Inc. [AMD/ATI]",
      "device":"Aqua Vanjaram [Instinct MI325X]",
      "driver":"amdgpu",
      "pci_class":"Processing accelerators [1200]",
      "iommu_group":"null",
      "pci_root":"0000:f5:00.0",
      "numa_node":-1,
      "sriov_totalvfs":0,
      "sriov_numvfs":0,
      "max_instances":null,
      "video_ram":null,
      "max_heads":null,
      "max_resolution_x":null,
      "max_resolution_y":null,

      "full_passthrough": {
        "enabled":1,
        "libvirt_address": {
          "domain":"0x0000",
          "bus":"0xf5",
          "slot":"0x00",
          "function":"0x0"
        },
        "used_by_vm":null
      },

      "vgpu_instances":[],
      "vf_instances":[]
    }

]}

versions

GPU : AMD Mi 325X
Cloudstack : 4.21

The steps to reproduce the bug

  1. SSH to a server with MI3xx GPUs on it
  2. run the gpudiscovery.sh script on it

What to do about it?

Patch incoming

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

Projects

Status

Done

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions