From 3c076c3c8096fa83440d701ba4d7d49606aaf61f Mon Sep 17 00:00:00 2001
From: ardfork <134447697+ardfork@users.noreply.github.com>
Date: Thu, 29 Jun 2023 18:03:16 +0000
Subject: [PATCH] Disable half2 for ExLlama when using HIP (#2912)

---
 modules/exllama.py    | 8 ++++++++
 modules/exllama_hf.py | 5 +++++
 2 files changed, 13 insertions(+)

diff --git a/modules/exllama.py b/modules/exllama.py
index a9ed8e1..0d16f4d 100644
--- a/modules/exllama.py
+++ b/modules/exllama.py
@@ -1,6 +1,8 @@
 import sys
 from pathlib import Path
 
+from torch import version as torch_version
+
 from modules import shared
 from modules.logging_colors import logger
 
@@ -51,6 +53,12 @@ class ExllamaModel:
         if shared.args.gpu_split:
             config.set_auto_map(shared.args.gpu_split)
             config.gpu_peer_fix = True
+        if torch_version.hip:
+            config.rmsnorm_no_half2 = True
+            config.rope_no_half2 = True
+            config.matmul_no_half2 = True
+            config.silu_no_half2 = True
+
 
         model = ExLlama(config)
         tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))
diff --git a/modules/exllama_hf.py b/modules/exllama_hf.py
index 9beb226..181a77a 100644
--- a/modules/exllama_hf.py
+++ b/modules/exllama_hf.py
@@ -97,6 +97,11 @@ class ExllamaHF(PreTrainedModel):
         if shared.args.gpu_split:
             config.set_auto_map(shared.args.gpu_split)
             config.gpu_peer_fix = True
+        if torch.version.hip:
+            config.rmsnorm_no_half2 = True
+            config.rope_no_half2 = True
+            config.matmul_no_half2 = True
+            config.silu_no_half2 = True
 
         # This slowes down a bit but align better with autogptq generation.
         # TODO: Should give user choice to tune the exllama config