Re-implement --load-in-4bit and update --llama-bits arg description

2023-03-10 23:21:01 +00:00 · 2023-03-10 23:21:01 +00:00 · 804486214b
commit 804486214b
parent 9ba8156a70
3 changed files with 10 additions and 4 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -88,9 +88,13 @@ def load_model(model_name):
        return model, tokenizer

    # 4-bit LLaMA
-    elif shared.args.llama_bits>0:
+    elif shared.args.llama_bits>0 or shared.args.load_in_4bit:
        sys.path.insert(0, os.path.abspath(Path("repositories/GPTQ-for-LLaMa")))
-        bits = shared.args.llama_bits
+        if shared.args.load_in_4bit:
+            bits = 4
+        else:
+            bits = shared.args.llama_bits
+        

        from llama import load_quant