Load llamacpp before quantized model (#1307)

2023-04-17 06:47:26 -07:00 · 2023-04-17 06:47:26 -07:00 · 07de7d0426
commit 07de7d0426
parent 3961f49524
1 changed files with 10 additions and 10 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -99,6 +99,16 @@ def load_model(model_name):
        return model, tokenizer
    # llamacpp model
    elif shared.is_llamacpp:
        from modules.llamacpp_model_alternative import LlamaCppModel
        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
        print(f"llama.cpp weights detected: {model_file}\n")
        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
        return model, tokenizer
    # Quantized model
    elif shared.args.wbits > 0:
@ -116,16 +126,6 @@ def load_model(model_name):
            model = load_quantized(model_name)
    # llamacpp model
    elif shared.is_llamacpp:
        from modules.llamacpp_model_alternative import LlamaCppModel
        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
        print(f"llama.cpp weights detected: {model_file}\n")
        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
        return model, tokenizer
    # Custom
    else:
        params = {"low_cpu_mem_usage": True}