Intel Gpu support initialization (#4340)
This commit is contained in:
parent
317e2c857e
commit
778a010df8
14 changed files with 106 additions and 42 deletions
|
@ -7,7 +7,12 @@ from pathlib import Path
|
|||
|
||||
import torch
|
||||
import transformers
|
||||
from accelerate import infer_auto_device_map, init_empty_weights
|
||||
from accelerate import (
|
||||
infer_auto_device_map,
|
||||
init_empty_weights,
|
||||
is_ccl_available,
|
||||
is_xpu_available
|
||||
)
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoModel,
|
||||
|
@ -38,8 +43,12 @@ if shared.args.deepspeed:
|
|||
# Distributed setup
|
||||
local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0"))
|
||||
world_size = int(os.getenv("WORLD_SIZE", "1"))
|
||||
torch.cuda.set_device(local_rank)
|
||||
deepspeed.init_distributed()
|
||||
if is_xpu_available() and is_ccl_available():
|
||||
torch.xpu.set_device(local_rank)
|
||||
deepspeed.init_distributed(backend="ccl")
|
||||
else:
|
||||
torch.cuda.set_device(local_rank)
|
||||
deepspeed.init_distributed()
|
||||
ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir)
|
||||
dschf = HfDeepSpeedConfig(ds_config) # Keep this object alive for the Transformers integration
|
||||
|
||||
|
@ -137,8 +146,9 @@ def huggingface_loader(model_name):
|
|||
if torch.backends.mps.is_available():
|
||||
device = torch.device('mps')
|
||||
model = model.to(device)
|
||||
elif hasattr(torch, 'xpu') and torch.xpu.is_available():
|
||||
model = model.to('xpu')
|
||||
elif is_xpu_available():
|
||||
device = torch.device("xpu")
|
||||
model = model.to(device)
|
||||
else:
|
||||
model = model.cuda()
|
||||
|
||||
|
@ -151,15 +161,10 @@ def huggingface_loader(model_name):
|
|||
|
||||
# Load with quantization and/or offloading
|
||||
else:
|
||||
conditions = [
|
||||
shared.args.cpu,
|
||||
torch.cuda.is_available(),
|
||||
torch.backends.mps.is_available(),
|
||||
hasattr(torch, 'xpu') and torch.xpu.is_available(),
|
||||
]
|
||||
|
||||
if not any(conditions):
|
||||
logger.warning('No GPU has been detected by Pytorch. Falling back to CPU mode.')
|
||||
if not any((shared.args.cpu, torch.cuda.is_available(), is_xpu_available(), torch.backends.mps.is_available())):
|
||||
logger.warning('torch.cuda.is_available() and is_xpu_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.')
|
||||
|
||||
shared.args.cpu = True
|
||||
|
||||
if shared.args.cpu:
|
||||
|
@ -362,7 +367,12 @@ def RWKV_loader(model_name):
|
|||
'''
|
||||
from modules.RWKV import RWKVModel, RWKVTokenizer
|
||||
|
||||
model = RWKVModel.from_pretrained(Path(f'{shared.args.model_dir}/{model_name}'), dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16", device="cpu" if shared.args.cpu else "cuda")
|
||||
model = RWKVModel.from_pretrained(
|
||||
Path(f'{shared.args.model_dir}/{model_name}'),
|
||||
dtype="fp32" if shared.args.cpu else "bf16" if shared.args.bf16 else "fp16",
|
||||
device="cpu" if shared.args.cpu else "xpu" if is_xpu_available() else "cuda"
|
||||
)
|
||||
|
||||
tokenizer = RWKVTokenizer.from_pretrained(Path(shared.args.model_dir))
|
||||
return model, tokenizer
|
||||
|
||||
|
@ -380,7 +390,10 @@ def get_max_memory_dict():
|
|||
# If --auto-devices is provided standalone, try to get a reasonable value
|
||||
# for the maximum memory of device :0
|
||||
elif shared.args.auto_devices:
|
||||
total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
|
||||
if is_xpu_available():
|
||||
total_mem = (torch.xpu.get_device_properties(0).total_memory / (1024 * 1024))
|
||||
else:
|
||||
total_mem = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
|
||||
suggestion = round((total_mem - 1000) / 1000) * 1000
|
||||
if total_mem - suggestion < 800:
|
||||
suggestion -= 1000
|
||||
|
@ -395,7 +408,10 @@ def get_max_memory_dict():
|
|||
def clear_torch_cache():
|
||||
gc.collect()
|
||||
if not shared.args.cpu:
|
||||
torch.cuda.empty_cache()
|
||||
if is_xpu_available():
|
||||
torch.xpu.empty_cache()
|
||||
else:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
def unload_model():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue