Add ExLlamaV2 and ExLlamav2_HF loaders (#3881)
This commit is contained in:
parent
a821928877
commit
c2a309f56e
9 changed files with 295 additions and 5 deletions
|
@ -59,6 +59,8 @@ def load_model(model_name, loader=None):
|
|||
'RWKV': RWKV_loader,
|
||||
'ExLlama': ExLlama_loader,
|
||||
'ExLlama_HF': ExLlama_HF_loader,
|
||||
'ExLlamav2': ExLlamav2_loader,
|
||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||
'ctransformers': ctransformers_loader,
|
||||
}
|
||||
|
||||
|
@ -329,6 +331,19 @@ def ExLlama_HF_loader(model_name):
|
|||
return ExllamaHF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def ExLlamav2_loader(model_name):
|
||||
from modules.exllamav2 import Exllamav2Model
|
||||
|
||||
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
def ExLlamav2_HF_loader(model_name):
|
||||
from modules.exllamav2_hf import Exllamav2HF
|
||||
|
||||
return Exllamav2HF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def get_max_memory_dict():
|
||||
max_memory = {}
|
||||
if shared.args.gpu_memory:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue