feat: Extend GPU Check utility to support more GPUs

- Refactored to a unified device listing function. - Now checks every supported hardware acceleration device type and lists the devices for all of them, to give a deeper system analysis. - Added Intel XPU support. - Improved AMD ROCm support. - Improved Apple MPS support.
2025-09-10 20:24:27 +02:00 · 2025-09-10 20:24:27 +02:00 · 39a035d106
commit 39a035d106
parent 6113567e94
1 changed files with 66 additions and 28 deletions
--- a/tools/gpu_check.py
+++ b/tools/gpu_check.py
@ -1,47 +1,85 @@
 import torch
-def show_cuda_gpu_list() -> None:
+def show_device_list(backend: str) -> int:
    """
-    Displays a list of all detected GPUs that support the CUDA Torch APIs.
+    Displays a list of all detected devices for a given PyTorch backend.
    Args:
        backend: The name of the device backend module (e.g., "cuda", "xpu").
    Returns:
        The number of devices found if the backend is usable, otherwise 0.
    """
-    num_gpus = torch.cuda.device_count()
+    backend_upper = backend.upper()
    print(f"Number of GPUs found: {num_gpus}")
-    for i in range(num_gpus):
+    try:
-        gpu_name = torch.cuda.get_device_name(i)
+        # Get the backend module from PyTorch, e.g., `torch.cuda`.
-        print(f'GPU {i}: "{gpu_name}"')
+        # NOTE: Backends always exist even if the user has no devices.
        backend_module = getattr(torch, backend)
        # Determine which vendor brand name to display.
        brand_name = backend_upper
        if backend == "cuda":
            # NOTE: This also checks for PyTorch's official AMD ROCm support,
            # since that's implemented inside the PyTorch CUDA APIs.
            # SEE: https://docs.pytorch.org/docs/stable/cuda.html
            brand_name = "NVIDIA CUDA / AMD ROCm"
        elif backend == "xpu":
            brand_name = "Intel XPU"
        elif backend == "mps":
            brand_name = "Apple MPS"
        if not backend_module.is_available():
            print(f"PyTorch: No devices found for {brand_name} backend.")
            return 0
        print(f"PyTorch: {brand_name} is available!")
        # Show all available hardware acceleration devices.
        device_count = backend_module.device_count()
        print(f"  * Number of {backend_upper} devices found: {device_count}")
        # NOTE: Apple Silicon devices don't have `get_device_name()` at the
        # moment, so we'll skip those since we can't get their device names.
        # SEE: https://docs.pytorch.org/docs/stable/mps.html
        if backend != "mps":
            for i in range(device_count):
                device_name = backend_module.get_device_name(i)
                print(f'  * Device {i}: "{device_name}"')
        return device_count
    except AttributeError:
        print(
            f'Error: The PyTorch backend "{backend}" does not exist, or is missing the necessary APIs (is_available, device_count, get_device_name).'
        )
    except Exception as e:
        print(f"Error: {e}")
    return 0
-def check_torch_gpus() -> None:
+def check_torch_devices() -> None:
    """
-    Checks for the availability of various PyTorch GPU acceleration platforms
+    Checks for the availability of various PyTorch hardware acceleration
-    and prints information about the discovered GPUs.
+    platforms and prints information about the discovered devices.
    """
-    # Check for AMD ROCm/HIP first, since it modifies the CUDA APIs.
+    print("Scanning for PyTorch hardware acceleration devices...\n")
    # NOTE: The unofficial ROCm/HIP backend exposes the AMD features through
    # the CUDA Torch API calls.
    if hasattr(torch.backends, "hip") and torch.backends.hip.is_available():
        print("PyTorch: AMD ROCm/HIP is available!")
        show_cuda_gpu_list()
-    # Check for NVIDIA CUDA.
+    device_count = 0
    elif torch.cuda.is_available():
        print("PyTorch: NVIDIA CUDA is available!")
        show_cuda_gpu_list()
-    # Check for Apple Metal Performance Shaders (MPS).
+    device_count += show_device_list("cuda")  # NVIDIA CUDA / AMD ROCm.
-    elif torch.backends.mps.is_available():
+    device_count += show_device_list("xpu")  # Intel XPU.
-        print("PyTorch: Apple MPS is available!")
+    device_count += show_device_list("mps")  # Apple Metal Performance Shaders (MPS).
        # PyTorch with MPS doesn't have a direct equivalent of `device_count()`
        # or `get_device_name()` for now, so we just confirm its presence.
        print("Using Apple Silicon GPU.")
    if device_count > 0:
        print("\nHardware acceleration detected. Your system is ready!")
    else:
-        print("PyTorch: No GPU acceleration detected. Running in CPU mode.")
+        print("\nNo hardware acceleration detected. Running in CPU mode.")
 if __name__ == "__main__":
-    check_torch_gpus()
+    check_torch_devices()