mudler · mudler · Jul 1, 2025 · Jun 27, 2025 · Jun 27, 2025
diff --git a/README.md b/README.md
@@ -141,10 +141,10 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
 
 ```bash
 # Intel GPU with FP16 support
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16
 
 # Intel GPU with FP32 support
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
+docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32
 ```
 
 ### Vulkan GPU Images:

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
@@ -38,9 +38,7 @@
 FRAMES = os.environ.get("FRAMES", "64")
 
 if XPU:
-    import intel_extension_for_pytorch as ipex
-
-    print(ipex.xpu.get_device_name(0))
+    print(torch.xpu.get_device_name(0))
 
 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
 MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
@@ -336,6 +334,8 @@ def LoadModel(self, request, context):
                 request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
 
             device = "cpu" if not request.CUDA else "cuda"
+            if XPU:
+                device = "xpu"
             self.device = device
             if request.LoraAdapter:
                 # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
@@ -359,12 +359,11 @@ def LoadModel(self, request, context):
 
                 self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
 
-            if request.CUDA:
-                self.pipe.to('cuda')
+            if device != "cpu":
+                self.pipe.to(device)
                 if self.controlnet:
-                    self.controlnet.to('cuda')
-            if XPU:
-                self.pipe = self.pipe.to("xpu")
+                    self.controlnet.to(device)
+
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
         # Implement your logic here for the LoadModel service

diff --git a/backend/python/diffusers/run.sh b/backend/python/diffusers/run.sh
@@ -6,4 +6,10 @@ else
     source $backend_dir/../common/libbackend.sh
 fi
 
-startBackend $@
+if [ -d "/opt/intel" ]; then
+    # Assumes we are using the Intel oneAPI container image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    export XPU=1
+fi
+
+startBackend $@