Add ExLlama support (#2444)

This commit is contained in:
oobabooga 2023-06-16 20:35:38 -03:00 committed by GitHub
parent dea43685b0
commit 9f40032d32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 156 additions and 47 deletions

View file

@ -48,7 +48,8 @@ def load_model(model_name, loader=None):
'GPTQ-for-LLaMa': GPTQ_loader,
'llama.cpp': llamacpp_loader,
'FlexGen': flexgen_loader,
'RWKV': RWKV_loader
'RWKV': RWKV_loader,
'ExLlama': ExLlama_loader
}
if loader is None:
@ -270,6 +271,13 @@ def AutoGPTQ_loader(model_name):
return modules.AutoGPTQ_loader.load_quantized(model_name)
def ExLlama_loader(model_name):
from modules.exllama import ExllamaModel
model, tokenizer = ExllamaModel.from_pretrained(model_name)
return model, tokenizer
def get_max_memory_dict():
max_memory = {}
if shared.args.gpu_memory: