Re-implement --load-in-4bit and update --llama-bits arg description
This commit is contained in:
parent
9ba8156a70
commit
804486214b
3 changed files with 10 additions and 4 deletions
|
@ -88,9 +88,13 @@ def load_model(model_name):
|
|||
return model, tokenizer
|
||||
|
||||
# 4-bit LLaMA
|
||||
elif shared.args.llama_bits>0:
|
||||
elif shared.args.llama_bits>0 or shared.args.load_in_4bit:
|
||||
sys.path.insert(0, os.path.abspath(Path("repositories/GPTQ-for-LLaMa")))
|
||||
bits = shared.args.llama_bits
|
||||
if shared.args.load_in_4bit:
|
||||
bits = 4
|
||||
else:
|
||||
bits = shared.args.llama_bits
|
||||
|
||||
|
||||
from llama import load_quant
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue