Add --num_experts_per_token parameter (ExLlamav2) (#4955)

This commit is contained in:
oobabooga 2023-12-17 12:08:33 -03:00 committed by GitHub
parent 12690d3ffc
commit f1f2c4c3f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 28 additions and 20 deletions

View file

@ -65,6 +65,18 @@ loaders_and_params = OrderedDict({
'logits_all',
'llamacpp_HF_info',
],
'ExLlamav2_HF': [
'gpu_split',
'max_seq_len',
'cfg_cache',
'no_flash_attn',
'num_experts_per_token',
'cache_8bit',
'alpha_value',
'compress_pos_emb',
'trust_remote_code',
'no_use_fast',
],
'ExLlama_HF': [
'gpu_split',
'max_seq_len',
@ -75,17 +87,6 @@ loaders_and_params = OrderedDict({
'trust_remote_code',
'no_use_fast',
],
'ExLlamav2_HF': [
'gpu_split',
'max_seq_len',
'cfg_cache',
'no_flash_attn',
'cache_8bit',
'alpha_value',
'compress_pos_emb',
'trust_remote_code',
'no_use_fast',
],
'AutoGPTQ': [
'triton',
'no_inject_fused_attention',
@ -123,6 +124,16 @@ loaders_and_params = OrderedDict({
'no_use_fast',
'gptq_for_llama_info',
],
'ExLlamav2': [
'gpu_split',
'max_seq_len',
'no_flash_attn',
'num_experts_per_token',
'cache_8bit',
'alpha_value',
'compress_pos_emb',
'exllamav2_info',
],
'ExLlama': [
'gpu_split',
'max_seq_len',
@ -131,15 +142,6 @@ loaders_and_params = OrderedDict({
'compress_pos_emb',
'exllama_info',
],
'ExLlamav2': [
'gpu_split',
'max_seq_len',
'no_flash_attn',
'cache_8bit',
'alpha_value',
'compress_pos_emb',
'exllamav2_info',
],
'ctransformers': [
'n_ctx',
'n_gpu_layers',