Intel Gpu support initialization (#4340)
This commit is contained in:
parent
317e2c857e
commit
778a010df8
14 changed files with 106 additions and 42 deletions
|
@ -5,15 +5,15 @@ from pathlib import Path
|
|||
import accelerate
|
||||
import torch
|
||||
import transformers
|
||||
from accelerate import is_xpu_available
|
||||
from gptq_for_llama import llama_inference_offload
|
||||
from gptq_for_llama.modelutils import find_layers
|
||||
from gptq_for_llama.quant import make_quant
|
||||
from transformers import AutoConfig, AutoModelForCausalLM
|
||||
|
||||
import modules.shared as shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
from gptq_for_llama import llama_inference_offload
|
||||
from gptq_for_llama.modelutils import find_layers
|
||||
from gptq_for_llama.quant import make_quant
|
||||
|
||||
|
||||
# This function is a replacement for the load_quant function in the
|
||||
# GPTQ-for_LLaMa repository. It supports more models and branches.
|
||||
|
@ -144,7 +144,7 @@ def load_quantized(model_name):
|
|||
model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold)
|
||||
|
||||
# accelerate offload (doesn't work properly)
|
||||
if shared.args.gpu_memory or torch.cuda.device_count() > 1:
|
||||
if shared.args.gpu_memory or torch.cuda.device_count() > 1 or (is_xpu_available() and torch.xpu.device_count() > 1):
|
||||
if shared.args.gpu_memory:
|
||||
memory_map = list(map(lambda x: x.strip(), shared.args.gpu_memory))
|
||||
max_cpu_memory = shared.args.cpu_memory.strip() if shared.args.cpu_memory is not None else '99GiB'
|
||||
|
@ -163,6 +163,9 @@ def load_quantized(model_name):
|
|||
|
||||
# No offload
|
||||
elif not shared.args.cpu:
|
||||
model = model.to(torch.device('cuda:0'))
|
||||
if is_xpu_available():
|
||||
model = model.to(torch.device("xpu:0"))
|
||||
else:
|
||||
model = model.to(torch.device('cuda:0'))
|
||||
|
||||
return model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue