Load llamacpp before quantized model (#1307)
This commit is contained in:
parent
3961f49524
commit
07de7d0426
1 changed files with 10 additions and 10 deletions
|
@ -99,6 +99,16 @@ def load_model(model_name):
|
||||||
|
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
# llamacpp model
|
||||||
|
elif shared.is_llamacpp:
|
||||||
|
from modules.llamacpp_model_alternative import LlamaCppModel
|
||||||
|
|
||||||
|
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
||||||
|
print(f"llama.cpp weights detected: {model_file}\n")
|
||||||
|
|
||||||
|
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
||||||
|
return model, tokenizer
|
||||||
|
|
||||||
# Quantized model
|
# Quantized model
|
||||||
elif shared.args.wbits > 0:
|
elif shared.args.wbits > 0:
|
||||||
|
|
||||||
|
@ -116,16 +126,6 @@ def load_model(model_name):
|
||||||
|
|
||||||
model = load_quantized(model_name)
|
model = load_quantized(model_name)
|
||||||
|
|
||||||
# llamacpp model
|
|
||||||
elif shared.is_llamacpp:
|
|
||||||
from modules.llamacpp_model_alternative import LlamaCppModel
|
|
||||||
|
|
||||||
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
|
|
||||||
print(f"llama.cpp weights detected: {model_file}\n")
|
|
||||||
|
|
||||||
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
|
|
||||||
return model, tokenizer
|
|
||||||
|
|
||||||
# Custom
|
# Custom
|
||||||
else:
|
else:
|
||||||
params = {"low_cpu_mem_usage": True}
|
params = {"low_cpu_mem_usage": True}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue