Add ExLlamaV2 and ExLlamav2_HF loaders (#3881)

This commit is contained in:
oobabooga 2023-09-12 14:33:07 -03:00 committed by GitHub
parent a821928877
commit c2a309f56e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 295 additions and 5 deletions

View file

@ -59,6 +59,8 @@ def load_model(model_name, loader=None):
'RWKV': RWKV_loader,
'ExLlama': ExLlama_loader,
'ExLlama_HF': ExLlama_HF_loader,
'ExLlamav2': ExLlamav2_loader,
'ExLlamav2_HF': ExLlamav2_HF_loader,
'ctransformers': ctransformers_loader,
}
@ -329,6 +331,19 @@ def ExLlama_HF_loader(model_name):
return ExllamaHF.from_pretrained(model_name)
def ExLlamav2_loader(model_name):
from modules.exllamav2 import Exllamav2Model
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
return model, tokenizer
def ExLlamav2_HF_loader(model_name):
from modules.exllamav2_hf import Exllamav2HF
return Exllamav2HF.from_pretrained(model_name)
def get_max_memory_dict():
max_memory = {}
if shared.args.gpu_memory: