feat: Extend GPU Check utility to support more GPUs

- Refactored to a unified device listing function. - Now checks every supported hardware acceleration device type and lists the devices for all of them, to give a deeper system analysis. - Added Intel XPU support. - Improved AMD ROCm support. - Improved Apple MPS support.
2025-09-10 20:24:27 +02:00 · 2025-09-10 20:24:27 +02:00 · 39a035d106
commit 39a035d106
parent 6113567e94
1 changed files with 66 additions and 28 deletions
--- a/tools/gpu_check.py
+++ b/tools/gpu_check.py
@ -1,47 +1,85 @@
 import torch


-def show_cuda_gpu_list() -> None:
+def show_device_list(backend: str) -> int:
    """
-    Displays a list of all detected GPUs that support the CUDA Torch APIs.
+    Displays a list of all detected devices for a given PyTorch backend.
+
+    Args:
+        backend: The name of the device backend module (e.g., "cuda", "xpu").
+
+    Returns:
+        The number of devices found if the backend is usable, otherwise 0.
    """

-    num_gpus = torch.cuda.device_count()
-    print(f"Number of GPUs found: {num_gpus}")
+    backend_upper = backend.upper()

-    for i in range(num_gpus):
-        gpu_name = torch.cuda.get_device_name(i)
-        print(f'GPU {i}: "{gpu_name}"')
+    try:
+        # Get the backend module from PyTorch, e.g., `torch.cuda`.
+        # NOTE: Backends always exist even if the user has no devices.
+        backend_module = getattr(torch, backend)
+
+        # Determine which vendor brand name to display.
+        brand_name = backend_upper
+        if backend == "cuda":
+            # NOTE: This also checks for PyTorch's official AMD ROCm support,
+            # since that's implemented inside the PyTorch CUDA APIs.
+            # SEE: https://docs.pytorch.org/docs/stable/cuda.html
+            brand_name = "NVIDIA CUDA / AMD ROCm"
+        elif backend == "xpu":
+            brand_name = "Intel XPU"
+        elif backend == "mps":
+            brand_name = "Apple MPS"
+
+        if not backend_module.is_available():
+            print(f"PyTorch: No devices found for {brand_name} backend.")
+            return 0
+
+        print(f"PyTorch: {brand_name} is available!")
+
+        # Show all available hardware acceleration devices.
+        device_count = backend_module.device_count()
+        print(f"  * Number of {backend_upper} devices found: {device_count}")
+
+        # NOTE: Apple Silicon devices don't have `get_device_name()` at the
+        # moment, so we'll skip those since we can't get their device names.
+        # SEE: https://docs.pytorch.org/docs/stable/mps.html
+        if backend != "mps":
+            for i in range(device_count):
+                device_name = backend_module.get_device_name(i)
+                print(f'  * Device {i}: "{device_name}"')
+
+        return device_count
+
+    except AttributeError:
+        print(
+            f'Error: The PyTorch backend "{backend}" does not exist, or is missing the necessary APIs (is_available, device_count, get_device_name).'
+        )
+    except Exception as e:
+        print(f"Error: {e}")
+
+    return 0


-def check_torch_gpus() -> None:
+def check_torch_devices() -> None:
    """
-    Checks for the availability of various PyTorch GPU acceleration platforms
-    and prints information about the discovered GPUs.
+    Checks for the availability of various PyTorch hardware acceleration
+    platforms and prints information about the discovered devices.
    """

-    # Check for AMD ROCm/HIP first, since it modifies the CUDA APIs.
-    # NOTE: The unofficial ROCm/HIP backend exposes the AMD features through
-    # the CUDA Torch API calls.
-    if hasattr(torch.backends, "hip") and torch.backends.hip.is_available():
-        print("PyTorch: AMD ROCm/HIP is available!")
-        show_cuda_gpu_list()
+    print("Scanning for PyTorch hardware acceleration devices...\n")

-    # Check for NVIDIA CUDA.
-    elif torch.cuda.is_available():
-        print("PyTorch: NVIDIA CUDA is available!")
-        show_cuda_gpu_list()
+    device_count = 0

-    # Check for Apple Metal Performance Shaders (MPS).
-    elif torch.backends.mps.is_available():
-        print("PyTorch: Apple MPS is available!")
-        # PyTorch with MPS doesn't have a direct equivalent of `device_count()`
-        # or `get_device_name()` for now, so we just confirm its presence.
-        print("Using Apple Silicon GPU.")
+    device_count += show_device_list("cuda")  # NVIDIA CUDA / AMD ROCm.
+    device_count += show_device_list("xpu")  # Intel XPU.
+    device_count += show_device_list("mps")  # Apple Metal Performance Shaders (MPS).

+    if device_count > 0:
+        print("\nHardware acceleration detected. Your system is ready!")
    else:
-        print("PyTorch: No GPU acceleration detected. Running in CPU mode.")
+        print("\nNo hardware acceleration detected. Running in CPU mode.")


 if __name__ == "__main__":
-    check_torch_gpus()
+    check_torch_devices()