From 3c076c3c8096fa83440d701ba4d7d49606aaf61f Mon Sep 17 00:00:00 2001 From: ardfork <134447697+ardfork@users.noreply.github.com> Date: Thu, 29 Jun 2023 18:03:16 +0000 Subject: [PATCH] Disable half2 for ExLlama when using HIP (#2912) --- modules/exllama.py | 8 ++++++++ modules/exllama_hf.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/modules/exllama.py b/modules/exllama.py index a9ed8e1..0d16f4d 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -1,6 +1,8 @@ import sys from pathlib import Path +from torch import version as torch_version + from modules import shared from modules.logging_colors import logger @@ -51,6 +53,12 @@ class ExllamaModel: if shared.args.gpu_split: config.set_auto_map(shared.args.gpu_split) config.gpu_peer_fix = True + if torch_version.hip: + config.rmsnorm_no_half2 = True + config.rope_no_half2 = True + config.matmul_no_half2 = True + config.silu_no_half2 = True + model = ExLlama(config) tokenizer = ExLlamaTokenizer(str(tokenizer_model_path)) diff --git a/modules/exllama_hf.py b/modules/exllama_hf.py index 9beb226..181a77a 100644 --- a/modules/exllama_hf.py +++ b/modules/exllama_hf.py @@ -97,6 +97,11 @@ class ExllamaHF(PreTrainedModel): if shared.args.gpu_split: config.set_auto_map(shared.args.gpu_split) config.gpu_peer_fix = True + if torch.version.hip: + config.rmsnorm_no_half2 = True + config.rope_no_half2 = True + config.matmul_no_half2 = True + config.silu_no_half2 = True # This slowes down a bit but align better with autogptq generation. # TODO: Should give user choice to tune the exllama config