Re-implement --load-in-4bit and update --llama-bits arg description

This commit is contained in:
draff 2023-03-10 23:21:01 +00:00
parent 9ba8156a70
commit 804486214b
3 changed files with 10 additions and 4 deletions

View file

@ -88,9 +88,13 @@ def load_model(model_name):
return model, tokenizer
# 4-bit LLaMA
elif shared.args.llama_bits>0:
elif shared.args.llama_bits>0 or shared.args.load_in_4bit:
sys.path.insert(0, os.path.abspath(Path("repositories/GPTQ-for-LLaMa")))
bits = shared.args.llama_bits
if shared.args.load_in_4bit:
bits = 4
else:
bits = shared.args.llama_bits
from llama import load_quant