Intel Gpu support initialization (#4340)

This commit is contained in:
Abhilash Majumder 2023-10-27 08:09:51 +05:30 committed by GitHub
parent 317e2c857e
commit 778a010df8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 106 additions and 42 deletions

View file

@ -5,15 +5,15 @@ from pathlib import Path
import accelerate
import torch
import transformers
from accelerate import is_xpu_available
from gptq_for_llama import llama_inference_offload
from gptq_for_llama.modelutils import find_layers
from gptq_for_llama.quant import make_quant
from transformers import AutoConfig, AutoModelForCausalLM
import modules.shared as shared
from modules.logging_colors import logger
from gptq_for_llama import llama_inference_offload
from gptq_for_llama.modelutils import find_layers
from gptq_for_llama.quant import make_quant
# This function is a replacement for the load_quant function in the
# GPTQ-for_LLaMa repository. It supports more models and branches.
@ -144,7 +144,7 @@ def load_quantized(model_name):
model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, kernel_switch_threshold=threshold)
# accelerate offload (doesn't work properly)
if shared.args.gpu_memory or torch.cuda.device_count() > 1:
if shared.args.gpu_memory or torch.cuda.device_count() > 1 or (is_xpu_available() and torch.xpu.device_count() > 1):
if shared.args.gpu_memory:
memory_map = list(map(lambda x: x.strip(), shared.args.gpu_memory))
max_cpu_memory = shared.args.cpu_memory.strip() if shared.args.cpu_memory is not None else '99GiB'
@ -163,6 +163,9 @@ def load_quantized(model_name):
# No offload
elif not shared.args.cpu:
model = model.to(torch.device('cuda:0'))
if is_xpu_available():
model = model.to(torch.device("xpu:0"))
else:
model = model.to(torch.device('cuda:0'))
return model