Add 4-bit LoRA support (#1200)

This commit is contained in:
oobabooga 2023-04-16 23:26:52 -03:00 committed by GitHub
parent ec3e869c27
commit 39099663a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 100 additions and 34 deletions

View file

@ -101,9 +101,20 @@ def load_model(model_name):
# Quantized model
elif shared.args.wbits > 0:
from modules.GPTQ_loader import load_quantized
model = load_quantized(model_name)
# Monkey patch
if shared.args.monkey_patch:
print("Warning: applying the monkey patch for using LoRAs in 4-bit mode.\nIt may cause undefined behavior outside its intended scope.")
from modules.monkey_patch_gptq_lora import load_model_llama
model, tokenizer = load_model_llama(model_name)
return model, tokenizer
# No monkey patch
else:
from modules.GPTQ_loader import load_quantized
model = load_quantized(model_name)
# llamacpp model
elif shared.is_llamacpp: