Intel Gpu support initialization (#4340)

2023-10-27 08:09:51 +05:30 · 2023-10-27 08:09:51 +05:30 · 778a010df8
commit 778a010df8
parent 317e2c857e
14 changed files with 106 additions and 42 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -7,7 +7,12 @@ from pathlib import Path

 import torch
 import transformers
-from accelerate import infer_auto_device_map, init_empty_weights
+from accelerate import (
+    infer_auto_device_map,
+    init_empty_weights,
+    is_ccl_available,
+    is_xpu_available
+)
 from transformers import (
    AutoConfig,
    AutoModel,
@ -38,8 +43,12 @@ if shared.args.deepspeed:
    # Distributed setup
    local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0"))
    world_size = int(os.getenv("WORLD_SIZE", "1"))
-    torch.cuda.set_device(local_rank)
-    deepspeed.init_distributed()
+    if is_xpu_available() and is_ccl_available():
+        torch.xpu.set_device(local_rank)
+        deepspeed.init_distributed(backend="ccl")
+    else:
+        torch.cuda.set_device(local_rank)
+        deepspeed.init_distributed()
    ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir)
    dschf = HfDeepSpeedConfig(ds_config)  # Keep this object alive for the Transformers integration

@ -137,8 +146,9 @@ def huggingface_loader(model_name):
        if torch.backends.mps.is_available():
            device = torch.device('mps')
            model = model.to(device)
-        elif hasattr(torch, 'xpu') and torch.xpu.is_available():
-            model = model.to('xpu')
+        elif is_xpu_available():
+            device = torch.device("xpu")
+            model = model.to(device)
        else:
            model = model.cuda()

@ -151,15 +161,10 @@ def huggingface_loader(model_name):

    # Load with quantization and/or offloading
    else:
-        conditions = [
-            shared.args.cpu,
-            torch.cuda.is_available(),
-            torch.backends.mps.is_available(),
-            hasattr(torch, 'xpu') and torch.xpu.is_available(),
-        ]

-        if not any(conditions):
-            logger.warning('No GPU has been detected by Pytorch. Falling back to CPU mode.')
+        if not any((shared.args.cpu, torch.cuda.is_available(), is_xpu_available(), torch.backends.mps.is_available())):
+            logger.warning('torch.cuda.is_available() and is_xpu_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.')
+
            shared.args.cpu = True

        if shared.args.cpu:
@ -362,7 +367,12 @@ def RWKV_loader(model_name):
    '''
    from modules.RWKV import RWKVModel, RWKVTokenizer

-    model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
+    model = RWKVModel.from_pretrained(
+        Path(f'{shared.args.model_dir}/{model_name}'),
+        dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16",
+        device="cpu" if shared.args.cpu else "xpu" if is_xpu_available() else "cuda"
+    )
+
    tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir))
    return model, tokenizer

@ -380,7 +390,10 @@ def get_max_memory_dict():
    # If --auto-devices is provided standalone, try to get a reasonable value
    # for the maximum memory of device :0
    elif shared.args.auto_devices:
-        total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+        if is_xpu_available():
+            total_mem = (torch.xpu.get_device_properties(0).total_memory / (1024 * 1024))
+        else:
+            total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
        suggestion = round((total_mem - 1000) / 1000) * 1000
        if total_mem - suggestion < 800:
            suggestion -= 1000
@ -395,7 +408,10 @@ def get_max_memory_dict():
 def clear_torch_cache():
    gc.collect()
    if not shared.args.cpu:
-        torch.cuda.empty_cache()
+        if is_xpu_available():
+            torch.xpu.empty_cache()
+        else:
+            torch.cuda.empty_cache()


 def unload_model():