Add ExLlama support (#2444)

2023-06-16 20:35:38 -03:00 · 2023-06-16 20:35:38 -03:00 · 9f40032d32
commit 9f40032d32
parent dea43685b0
12 changed files with 156 additions and 47 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -48,7 +48,8 @@ def load_model(model_name, loader=None):
        'GPTQ-for-LLaMa': GPTQ_loader,
        'llama.cpp': llamacpp_loader,
        'FlexGen': flexgen_loader,
-        'RWKV': RWKV_loader
+        'RWKV': RWKV_loader,
+        'ExLlama': ExLlama_loader
    }

    if loader is None:
@ -270,6 +271,13 @@ def AutoGPTQ_loader(model_name):
    return modules.AutoGPTQ_loader.load_quantized(model_name)


+def ExLlama_loader(model_name):
+    from modules.exllama import ExllamaModel
+
+    model, tokenizer = ExllamaModel.from_pretrained(model_name)
+    return model, tokenizer
+
+
 def get_max_memory_dict():
    max_memory = {}
    if shared.args.gpu_memory: